# -*- coding: utf-8 -*-
import copy
import random
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from cnncmall.rules import *
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }

    # 常用链接
    cat_url = "https://www.cnncmall.com/pesapp/mall/noauth/queryGoodsCategory"
    sku_list_url = 'https://www.cnncmall.com/cnnc/mall/noauth/searchGoods'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 页数限制
        self.max_page_limit = 250
        self.page_size = 40

    def start_requests(self):
        yield Request(
            method='POST',
            url=self.cat_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            headers={
                'Accept': 'application/json, text/plain, */*',
                'Content-Type': 'application/json;charset=UTF-8',
                'Connection': 'keep-alive',
                'auth-token': '123',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
            },
            body=json.dumps({"redisKey": "1001", "index": True}),
            callback=self.parse_catalog,
            errback=self.error_back,
            dont_filter=True
        )

    # 处理分类
    def parse_catalog(self, response):
        # 处理品类列表
        cats, todo_cat_list = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

        if todo_cat_list:
            for cat in todo_cat_list:
                if cat.get('level') == 3:
                    cat3_id = cat.get('catalogId')
                    page = 1
                    yield self._build_req(page=page, callback=self.parse_sku_list, cat3_id=cat3_id, sort=1)

    def _build_req(self, page, callback, cat3_id, sort=None, brand=None, vendor=None):
        if cat3_id:
            query = {
                "queryStr": "",
                "queryChannelId": "1001",
                "categoryId": cat3_id,
                "catalogName": "",
                "level": 3,
                "brandId": "",
                "supplierShopId": "",
                "orderByColumn": "3",
                "orderType": 1,
                "queryParams": [],
                "agreementId": "",
                "minSalesPrice": 0,
                "maxSalesPrice": "",
                "orgPath": "",
                "userId": "",
                "pageSize": self.page_size,
                "pageNo": page,
                "activityId": "",
                "doContract": False,
                "companyId": "",
                "isprofess": "",
                "doActity": False,
                "doSelect": 0,
                "commodityTypeIds": [],
                "rsGoodsSearch": False
            }
            # 排序
            if sort is not None:
                query["orderType"] = sort
            # 品牌
            if brand:
                query["queryParams"].append({
                    "filterId": "brand_id_name",
                    "filterName": "品牌",
                    "filterValues": [
                        str(brand)
                    ],
                    "categoryId": ""
                })
            # 品牌
            if vendor:
                query["queryParams"].append({
                    "filterId": "vendor_id_name",
                    "filterName": "供应商",
                    "filterValues": [
                        str(vendor)
                    ],
                    "categoryId": ""
                })

            priority = int(random.choice(range(10, 1000)))
            return Request(
                method='POST',
                url=self.sku_list_url,
                body=json.dumps(query),
                meta={
                    'reqType': 'sku',
                    'batchNo': self.batch_no,
                    'page': page,
                    'cat3Id': cat3_id,
                    'vendor': vendor,
                    'brand': brand,
                    'sort': sort,
                },
                headers={
                    'Host': 'www.cnncmall.com',
                    'Connection': 'keep-alive',
                    'Accept': 'application/json, text/plain, */*',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
                    'auth-token': '123',
                    'Content-Type': 'application/json;charset=UTF-8',
                    'Origin': 'https://www.cnncmall.com',
                    'Sec-Fetch-Site': 'same-origin',
                    'Sec-Fetch-Mode': 'cors',
                    'Sec-Fetch-Dest': 'empty',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
                },
                callback=callback,
                errback=self.error_back,
                priority=priority,
                dont_filter=True
            )

    def parse_sku_list(self, response):
        meta = response.meta
        cat3_id = meta.get("cat3Id")
        cur_page = meta.get("page")
        cur_sort = meta.get("sort", None)
        cur_vendor = meta.get("vendor", None)
        cur_brand = meta.get("brand", None)

        # 第一页
        total_pages = parse_total_page(response)
        sku_list, item_list = parse_sku_list(response)
        if not sku_list:
            self.logger.info('空页: cat=%s, page=%s' % (cat3_id, cur_page))
        else:
            self.logger.info('清单1: cat=%s, page=%s, count=%s' % (cat3_id, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
            # 不用加参数
            if total_pages < self.max_page_limit:
                self.logger.info('页数：cat=%s, ttp=%s' % (cat3_id, total_pages))
                # 商品量 <= 10000
                for page in range(2, total_pages + 1):
                    yield self._build_req(page=page, callback=self.parse_more_sku, cat3_id=cat3_id, sort=cur_sort,
                                          vendor=cur_vendor, brand=cur_brand)
            elif total_pages >= self.max_page_limit:
                # self.logger.info('页数超限1：cat=%s, ttp=%s' % (cat3_id, total_pages))
                vendor_list, brand_list = parse_query_info(response)
                # 加供应商参数
                if vendor_list and not cur_vendor:
                    for vendor_item in vendor_list:
                        page = 1
                        self.logger.info('加参1：cat=%s, vendor=%s' % (cat3_id, vendor_item))
                        yield self._build_req(page=page, callback=self.parse_sku_list, cat3_id=cat3_id, sort=cur_sort,
                                              vendor=vendor_item, brand=cur_brand)
                elif brand_list and not cur_brand:
                    for brand_item in brand_list:
                        page = 1
                        self.logger.info('加参2：cat=%s, vendor=%s, brand=%s' % (cat3_id, cur_vendor, brand_item))
                        yield self._build_req(page=page, callback=self.parse_sku_list, cat3_id=cat3_id, sort=cur_sort,
                                              vendor=cur_vendor, brand=brand_item)
                else:
                    self.logger.info(
                        '超限放弃1：cat=%s, vendor=%s, brand=%s, ttp=%s' % (cat3_id, cur_vendor, cur_brand, total_pages))

    def parse_more_sku(self, response):
        meta = response.meta
        cat3_id = meta.get("cat3Id")
        cur_page = meta.get("page")
        cur_sort = meta.get("sort", None)
        cur_vendor = meta.get("vendor", None)
        cur_brand = meta.get("brand", None)

        sku_list, item_list = parse_sku_list(response)
        if sku_list:
            self.logger.info('清单2: cat=%s, vendor=%s, brand=%s, page=%s, count=%s' % (
                cat3_id, cur_vendor, cur_brand, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
            yield Box('item', self.batch_no, item_list)
        else:
            self.logger.info('空页: cat=%s, vendor=%s, brand=%s, page=%s' % (cur_page, cur_vendor, cur_brand, cat3_id))


